Each group member must uniquely complete this assignment for the group to get credit for this homework. If only some members of the group complete the assignment, they must each submit a supplemental explanation along with their submission as to why the other students in their group have not completed this assignment.
Pirates_Snakes.Rmd and Pirates_Snakes.ipynb (from Class 5) and in the code below (Class 6) some ways to find data are
data() will show you data sets available in R
Pirates_Snakes.Rmd (Class 6), e.g., data("USArrests")
.csv or other tabular “non built-in” data source.csv or other tabular data format and THEN reading it into R (to get credit for the last 10%size and color attributes in your graphic
hwy versus displ” plots here:
ggplot(data = mpg) + geom_point(mapping = aes(x = displ, y = hwy, color = class)) # (colored by class)ggplot(data = mpg) + geom_point(mapping = aes(x = displ, y = hwy, size = class))class)facets feature of ggplot2
scale_colour_manual(values=cbbPalette) and cbbPalette <- c("#009E73", "#CC79A7","#D55E00", "#000000","#56B4E9", "#F0E442", "#0072B2", "#E69F00") is described hereHint: you’ll need 3 factor variables and two continuous variables to do this, though continuous variables can be discretized to produce factor level variables.
library(tidyverse)
# https://readr.tidyverse.org/reference/read_delim.html
d1 <- "/Users/gck8gd/Documents/courses/SDS_3003_Communication_w_Data/primary_results.csv"
primary_results <- readr::read_csv(d1)
d2 <- "/Users/gck8gd/Documents/courses/SDS_3003_Communication_w_Data/county_facts.csv"
county_facts <- readr::read_csv(d2)
# https://stringr.tidyverse.org/reference/str_replace.html
# https://dplyr.tidyverse.org/reference/join.html
county_facts %>% dplyr::mutate(county=stringr::str_replace(area_name,
' County', '')) %>%
dplyr::inner_join(primary_results, by=c('state_abbreviation','county')) ->
county_factsANDprimaries
ggplot functionmy_first_ggplot <- ggplot(data=county_factsANDprimaries)
my_first_ggplot
ggplot partsls(my_first_ggplot)
## [1] "coordinates" "data" "facet" "labels" "layers"
## [6] "mapping" "plot_env" "scales" "theme"
ggplot datamy_first_ggplot[['data']]
## # A tibble: 17,028 x 61
## fips.x area_name state_abbreviat… PST045214 PST040210 PST120214 POP010210
## <dbl> <chr> <chr> <dbl> <dbl> <dbl> <dbl>
## 1 1001 Autauga … AL 55395 54571 1.5 54571
## 2 1001 Autauga … AL 55395 54571 1.5 54571
## 3 1001 Autauga … AL 55395 54571 1.5 54571
## 4 1001 Autauga … AL 55395 54571 1.5 54571
## 5 1001 Autauga … AL 55395 54571 1.5 54571
## 6 1001 Autauga … AL 55395 54571 1.5 54571
## 7 1001 Autauga … AL 55395 54571 1.5 54571
## 8 1003 Baldwin … AL 200111 182265 9.8 182265
## 9 1003 Baldwin … AL 200111 182265 9.8 182265
## 10 1003 Baldwin … AL 200111 182265 9.8 182265
## # … with 17,018 more rows, and 54 more variables: AGE135214 <dbl>,
## # AGE295214 <dbl>, AGE775214 <dbl>, SEX255214 <dbl>, RHI125214 <dbl>,
## # RHI225214 <dbl>, RHI325214 <dbl>, RHI425214 <dbl>, RHI525214 <dbl>,
## # RHI625214 <dbl>, RHI725214 <dbl>, RHI825214 <dbl>, POP715213 <dbl>,
## # POP645213 <dbl>, POP815213 <dbl>, EDU635213 <dbl>, EDU685213 <dbl>,
## # VET605213 <dbl>, LFE305213 <dbl>, HSG010214 <dbl>, HSG445213 <dbl>,
## # HSG096213 <dbl>, HSG495213 <dbl>, HSD410213 <dbl>, HSD310213 <dbl>,
## # INC910213 <dbl>, INC110213 <dbl>, PVY020213 <dbl>, BZA010213 <dbl>,
## # BZA110213 <dbl>, BZA115213 <dbl>, NES010213 <dbl>, SBO001207 <dbl>,
## # SBO315207 <dbl>, SBO115207 <dbl>, SBO215207 <dbl>, SBO515207 <dbl>,
## # SBO415207 <dbl>, SBO015207 <dbl>, MAN450207 <dbl>, WTN220207 <dbl>,
## # RTN130207 <dbl>, RTN131207 <dbl>, AFN120207 <dbl>, BPS030214 <dbl>,
## # LND110210 <dbl>, POP060210 <dbl>, county <chr>, state <chr>, fips.y <dbl>,
## # party <chr>, candidate <chr>, votes <dbl>, fraction_votes <dbl>
aes aesthetic mappingsmy_first_ggplot[['mapping']]
## Aesthetic mapping:
## <empty>
my_first_ggplot <- ggplot(data=county_factsANDprimaries,
mapping=aes(x=PST045214, y=fraction_votes))
my_first_ggplot[['mapping']]
## Aesthetic mapping:
## * `x` -> `PST045214`
## * `y` -> `fraction_votes`
my_first_ggplot
geom renderingmy_first_ggplot + geom_point(mapping=aes(x=PST045214, y=fraction_votes)) ->
my_first_rendered_ggplot
my_first_rendered_ggplot
# https://r4ds.had.co.nz/graphics-for-communication.html#scales
# https://r4ds.had.co.nz/graphics-for-communication.html#replacing-a-scale
my_first_rendered_ggplot + scale_x_log10()
# https://stackoverflow.com/questions/7714677/scatterplot-with-too-many-points
my_first_ggplot + geom_point() + scale_x_log10()
my_first_ggplot + geom_point(alpha=0.1) + scale_x_log10()
my_first_ggplot + geom_point(alpha=0.1, size=2.5, stroke=0) + scale_x_log10()
my_first_ggplot + geom_hex() + scale_x_log10()
my_first_ggplot + scale_x_log10() + geom_density_2d()
my_first_ggplot + geom_hex() + geom_density_2d() + scale_x_log10()
# https://ggplot2.tidyverse.org/reference/scale_viridis.html
my_first_ggplot + geom_hex() + geom_density_2d() + scale_x_log10() +
scale_fill_viridis_c()
my_first_ggplot + scale_fill_viridis_c() + scale_x_log10() +
stat_density_2d(aes(fill=stat(density)), geom='raster', contour=FALSE) +
coord_cartesian(expand=FALSE) +
geom_point(color='white', shape='.', size=0.0001, alpha=0.1)
aes optionsmy_second_ggplot <- ggplot(data=county_factsANDprimaries,
mapping=aes(size=POP060210, alpha=POP060210,
color=party,
x=PST045214, y=fraction_votes))
my_second_ggplot + geom_point() + scale_x_log10()
county_factsANDprimaries <- county_factsANDprimaries %>%
dplyr::rename(`County Population`=PST045214,
`County Density (per square mile)`=POP060210,
Party=party, `Fraction of Votes`=fraction_votes)
my_second_ggplot <- ggplot(data=county_factsANDprimaries,
mapping=aes(size=`County Density (per square mile)`,
alpha=`County Density (per square mile)`,
color=Party,
x=`County Population`,
y=`Fraction of Votes`))
my_second_ggplot + geom_point() + scale_x_log10()
county_factsANDprimaries <- county_factsANDprimaries %>%
dplyr::mutate(Candidate = stringr::str_replace(candidate, ' ','\n'))
my_third_ggplot <- ggplot(data=county_factsANDprimaries,
mapping=aes(size=`County Population`,
alpha=`County Population`,
color=Party,
x=`County Density (per square mile)`,
y=`Fraction of Votes`))
my_third_ggplot + geom_point() + scale_x_log10() +
facet_wrap(~ Candidate, nrow = 2)
# https://community.rstudio.com/t/x-axis-labels-overlap-want-to-rotate-labels-45/63800
# http://www.sthda.com/english/wiki/ggplot2-axis-ticks-a-guide-to-customize-tick-marks-and-labels
my_third_ggplot + geom_point() + scale_x_log10() +
facet_wrap(~ Candidate, nrow=4, scales='fixed') +
scale_color_hue(direction=-1) +
scale_y_continuous(breaks=c(0,0.2,0.4,0.6,0.8, 1.0),
guide=guide_axis(n.dodge=2, check.overlap=TRUE)) +
coord_flip()